# -*- coding: utf-8 -*-#
"""
@File : 04-聚类.py
@Description :
@Author : Le.Qing
@Create Time : 2025-05-08 10:28
"""
from langchain_ollama import OllamaEmbeddings
from sklearn.cluster import KMeans

def embeddingLLM():
    return OllamaEmbeddings(base_url="http://localhost:11434", model="chevalblanc/acge_text_embedding")


model = embeddingLLM()
texts = ['苹果', '尺子', '斑马', '大象', '铅笔', '菠萝', '橡皮擦', '西瓜', '老鼠']
output_embeddings = model.embed_documents(texts)

kmeans = KMeans(n_clusters=3)  # 3为种类数目
kmeans.fit(output_embeddings)
label = kmeans.labels_
for i in range(len(texts)):
    print(f"cls({texts[i]}) = {label[i]}")
"""
cls(苹果) = 2
cls(尺子) = 1
cls(斑马) = 0
cls(大象) = 0
cls(铅笔) = 1
cls(菠萝) = 2
cls(橡皮擦) = 1
cls(西瓜) = 2
cls(老鼠) = 0
"""